#Online datasets can be conveniently downloaded locally using the following python functions:
# Authors: Pan.LC
# Date: 2024/3/18
# License: MIT License


import os
from pooch import retrieve

ID_List = [9865569,9865577,9865589,9865591,9865576,9865586,9865567,9865588,
           9865580,9865570,9865582,9865581,9865593,9865573,9865572,9865571,
           9865566,9865575,9865590,9865583,9865592,9865585,9865578,9865584,
           9865587,9865574,9865568,9865579,] # MATLAB v7 file IDs

FILES = [f"https://dataverse.harvard.edu/api/access/datafile/{i}" for i in ID_List]

def load_pan2023_data(subject, base_path=''):
    """Load EEG data for a given subject from the Pan2023 dataset.

    Parameters
    ----------
    subject : int
        Subject number, must be between 1 and 14.
    base_path : str, optional
        Base path where the EEG data files are stored. Defaults to current directory.

    Returns
    -------
    list of str
        Paths to the subject's EEG data files.
    """
    if not 1 <= subject <= 14:
        raise ValueError(f"Subject must be between 1 and 14. Got {subject}.")

    # Define the file URLs for the Pan2023 dataset
    file_paths = [
        os.path.join(base_path, f"S{str(sub).zfill(2)}D{day}.mat")
        for sub in (subject,) for day in (1, 2)
    ]

    # Download missing files from the Pan2023 dataset
    for i, file_path in enumerate(file_paths, start=1):
        if not os.path.isfile(file_path):
            url = FILES[subject*2 - 3 + i]
            retrieve(url, None, file_path, base_path, progressbar=True)

    return file_paths